home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
EnigmA Amiga Run 1997 February
/
EnigmA AMIGA RUN 15 (1997)(G.R. Edizioni)(IT)[!][issue 1997-02][PLANET CD V].iso
/
recent
/
graburl.lha
/
GrabURL
/
rexx
/
GrabURL.rexx
Wrap
OS/2 REXX Batch file
|
1997-01-18
|
23KB
|
694 lines
/******************************************************************************
*
* $VER: GrabURL 1.02 (18.1.97) (c) 1996-97 Serge Emond
*
*******************************************************************************
*
* Url Types:
* F Failed.
* Q Queued
* R Received
* X Error.
* U Unused/Unknown
*
******************************************************************************/
options results
options failat 100
signal on break_c
signal on halt
signal on ioerr
PARSE ARG Arguments
call Main
call DoFail 0
/********************************************************* Default Settings **/
DoDefaults:
/* Boolean are always set by ReadArgs.. no defaults! */
opts.Url.Count = 0 /* Don't grab url */
opts.Depth = -1 /* Grab the world! */
opts.InFile = '' /* Don't read files */
opts.MaxSize = 0 /* No limit */
opts.MinSpaceLeft = 0 /* No Checking */
opts.Pattern = '' /* No Pattern Matching */
opts.SaveRoot = 'Work:Urls/' /* Where to put files */
opts.WorkFile = '' /* No WorkFile */
opts.Delay = 0 /* seconds */ /* Delay between grabs */
opts.MaxTime = 0 /* minutes */ /* Max. time to download */
opts.MaxCount = 0 /* Max. # of file to grab */
opts.MaxBytes = 0 /* Max. # of bytes to grab */
/* Defaults not configurable on command-line */
def.KeepReceived = 1 /* 1 = Keep Recd in list, 0 = Remove Them */
def.KeepFailed = 1 /* 1 = Keep Failed in list, 0 = Remove Them */
def.KeepUnused = 1 /* 1 = Keep Unused in list */
def.FollowMoved = 1 /* 1 = Grab new destination given by the server */
def.FollowTMoved = 1 /* 1 = Same as def.FollowMoved but when a */
/* Temporary Move is issued */
def.Secure = 1 /* 1 = Save WorkFile each time it is modified */
/* 0 = Save on exit only */
def.Accept = '*/*' /* We accept all types of files */
def.EMail = '' /* We are anonymous.. */
def.TimeZone = 300 /* Minutes to ADD to localtime to get GMT */
def.WriteBufSize = 16*1024 /* Write Buffer Size */
def.Translate = '~:[]()' /* Characters to be translated to "_" when saving
to disk */
def.TranslateTo = copies("_", length(def.Translate))
/* Characters to translate to.. */
def.TouchOldDirs = 1 /* 1 = Reset date of old directories */
def.TempFile = 't:GrabURL.'UID /* Temporary file to use when parsing */
def.ParsePattern = '(FTP|HTTP)://#?' /* Just add ftp & http in the list */
def.PasswordFile = '' /* File containing authentication infos */
/* Where to find external programs */
def.Path = ''
def.ScanHTML = def.Path'ScanHTML'
def.GrabHTTP = 'run <>nil: 'def.Path'GrabHTTP'
def.UrlManager = 'run <>nil: 'def.Path'UrlManager'
/* Output information */
def.Output = 'CONSOLE:'
def.Progress = 1 /* 1 = Display progress in window */
def.ExitOnClose = 0 /* 1 = Abort everything if CloseGadget
event. 0 = Skip current file */
/* Progress window font information */
def.Font = 'topaz.font'
def.FSize = 8
/* ARexx Port Names */
um = 'UM'UID
gh = 'GH'UID
return 0
/**************************************************************** DoDefines **/
DoDefines:
UID = pragma('i') /* Unique ID */
if ~show('L', "rexxsupport.library") then
call AddLib("rexxsupport.library", 0, -30)
if ~show('L', "rexxdossupport.library") then
call AddLib("rexxdossupport.library", 0, -30, 0)
opts = "Help/S,Url=U/M,Delay/N,Depth=D/N,HeaderOnly=HO/S,"
opts = opts"IfModified=IM/S,InFile=IF/K,MaxBytes=MB/N,MaxCount=MC/N,"
opts = opts"MaxSize=MS/N,MaxTime=MT/N,MinSpaceLeft=MSL/N,"
opts = opts"NoBG/S,NoDirs=ND/S,NoHRef/S,NoSrc/S,NotExists=NE/S,"
opts = opts"Pattern=P/K,Query=Q/S,Recursive=R/S,Retry/S,"
opts = opts"SaveHeaders=SH/S,SaveRoot=SR/K,Verbose/S,WorkFile=WF/K"
LF = '0a'x
return 0
/** Main *********************************************************************/
Main:
call DoDefines
call DoDefaults
if ~open('l', def.Output, 'W') then do
say 'Can''t open output display'
exit(10)
end
if Arguments = '?' then do
call Log(opts)
call DoFail 0
end
if strip(Arguments) = '' then do
call Log("Nothing to do!")
call DoFail 10
end
if ~ReadArgs(Arguments, opts, "opts.") then
call DoFail(10, 'Error: 'Fault(RC))
if opts.Help then call DisplayHelp
/* Do we have something to do? */
if (opts.Url.Count = 0) & (opts.InFile = '') & (opts.WorkFile = '') then
DoFail 0
call DoInit
call ReadRealms
/* ScanHTML really likes stack.. */
oldstack = pragma('s',16384)
if oldstack>16384 then call pragma('s',oldstack)
drop oldstack
call OpenPorts
call AddUrls
call cmd(um, 'GetInMem')
if opts.Verbose then call Log umres' urls to process'
call TheLoop
if ~def.KeepReceived then call cmd(um, 'KillType R')
if ~def.KeepFailed then call cmd(um, 'KillType F')
call SaveWorkFile
call DoFail 0
/** Log **********************************************************************/
Log:
PARSE ARG log_text, log_nolf
if log_nolf=1 then call writech('l', log_text)
else call writeln('l', log_text)
drop log_text log_nolf
return 0
/** DoFail********************************************************************/
break_c:
say '***BREAK'
call DoFail 10
DoFail:
PARSE ARG fail.rc, fail.msg
if fail.rc = '' then fail.rc = 0
if text_message~='' & fail.rc~=0 then call Log(fail.msg)
if show('P', gh) then call cmd(gh, 'Quit')
if show('P', um) then call cmd(um, 'Quit')
call close('l')
exit fail.rc
/** Display Help *************************************************************/
DisplayHelp:
call Log "Arguments (Abbrev) <arg>"
call Log " <#> -> number"
call Log " <s> -> string"
call Log
call Log " URL (U) <s> Url to grab (can have multiple arguments)"
call Log " InFile (IF) <s> Input file containing one url/line to grab"
call Log " WorkFile (WF) <s> File to load/save urls to. (Keeping flags)"
call Log ""
call Log " Depth (D) <#> Level of recursion (Default: grab indefinitely)"
call Log " IfModified (IM) Grab only files modified since last grab"
call Log " MaxCount (MC) <#> Maximal number of file to grab"
call Log " MaxSize (MS) <#> Maximal size a file can have in order to grab it"
call Log " MaxTime (MT) <#> Maximal time to download in minutes"
call Log " MinSpaceLeft (MSL) <#> Minimal space to leave on disk when grabbing"
call Log " NoBG Don't get background images"
call Log " NoHRef Don't get referenced files"
call Log " NoSrc Don't get 'SRC' urls"
call Log " NotExists (NE) Grab only if the file does not already exists on disk"
call Log " Pattern (P) <s> AmigaDOS pattern telling which URLs to grab"
call Log " Query (Q) Allow '?' in urls"
call Log " Recursive (R) Collect files recursively"
call Log " Retry Retry files that failed"
call Log ""
call Log " Delay <#> Time to wait between each grab (seconds)"
call Log " HeaderOnly (HO) Grab headers, not the files"
call Log " NoDir (ND) Don't create dirs - put the file in current directory"
call Log " SaveHeaders (SH) Save the header of each file (.HDR)"
call Log " SaveRoot (SR) <s> Directory where to put files"
call Log " Verbose Display more stuff"
call Log ""
call Log "Please see docs for more infos."
call DoFail 0
/** cmd **********************************************************************/
/* Send a string to an ARexx port */
cmd:
PARSE ARG port, commd
address value port
commd
drop port string
return RC
/** DoInit - Initialize Vars *************************************************/
DoInit:
g.cnt = 0 /* # of url we are grabbing */
g.bytes = 0 /* # of bytes we received */
opts.TDelay = 50 * opts.Delay /* Transform in ticks */
opts.MaxTimeS = opts.MaxTime*60 /* Transform in seconds */
call time('r') /* Reset the time to 0 */
return 0
/** OpenPorts ****************************************************************/
OpenPorts:
/* Port already in use...? */
if show('P', gh) then call DoFail(10, 'Port already in use')
/* Port already in use...? */
if show('P', um) then call DoFail(10, 'Port already in use')
/* Start GrabHTTP */
OP.cmd = def.GrabHTTP' PORT 'gh' ACCEPT 'def.Accept' Font 'def.Font' FontSize 'def.FSize' WBuf 'def.WriteBufSize
address command OP.cmd
if RC ~= 0 then call DoFail(10, 'Can''t start GrabHTTP')
OP.i=0; do forever
if show('P', gh) then break
OP.i = OP.i + 1
if OP.i = 10 then call DoFail(10, 'Can''t find GrabHTTP''s port')
call Delay(20) /* Wait 0.2 secs (10*20/50) */
end
/* Add '/' to SaveRoot path */
OP.test = opts.SaveRoot ~= ''
OP.test = OP.test & right(opts.SaveRoot,1) ~= ':'
OP.test = OP.test & right(opts.SaveRoot,1) ~= '/'
if OP.test then opts.SaveRoot = opts.SaveRoot'/'
drop OP.test
/* CD to SaveRoot dir */
if cmd(gh, 'SetDir "'opts.SaveRoot'"') ~= 0 then
call DoFail(10, 'Error cd saveroot directory')
/* Set other defaults */
call cmd(gh, 'SetMinSpaceLeft 'opts.MinSPaceLeft)
call cmd(gh, 'SetEMail 'def.EMail)
if cmd(gh, 'SetTimeZone m 'def.TimeZone) ~= 0 then
call DoFail(10, 'Error setting TimeZone')
/* Start UrlManager */
OP.cmd = def.UrlManager || ' PORT ' || um
address command OP.cmd
if RC ~= 0 then call DoFail(10, 'Can''t start UrlManager')
OP.i=0; do forever
if show('P', um) then break
OP.i = OP.i + 1
if OP.i = 10 then call DoFail(10, 'Can''t find UrlManager''s port')
call Delay(20) /* Wait 0.2 secs (10*20/50) */
end
drop OP.i OP.cmd
return 0
/** AddUrls ******************************************************************/
/* Priority: CLI urls prevails on InFile, InFile on WorkFile */
AddUrls:
/* Add Urls */
if opts.Url.count ~= 0 then do i = 0 to opts.Url.count-1
/* Simple HTTP:// checking */
if left(upper(opts.Url.i),7) ~= 'HTTP://' then iterate
thecmd = 'AddUrl TYPE Q URL "'opts.Url.i'" DEPTH '
if opts.Recursive then thecmd = thecmd || opts.Depth
else thecmd = thecmd || '0'
if cmd(um, thecmd) = 2 then call DoFail(10, "Error adding url")
end
drop i
/* Add InFile */
if opts.InFile ~='' then do
thecmd = 'ReadFile FILE "'opts.InFile'" TYPE Q DEPTH '
if opts.Recursive then thecmd = thecmd || opts.Depth
else thecmd = thecmd || '0'
if cmd(um, thecmd) ~= 0 then call DoFail(10, "Error adding InFile")
end
drop thecmd
/* Get WorkFile */
if (opts.WorkFile ~= '') & exists(opts.WorkFile) then
call cmd(um, 'LoadFile "'opts.WorkFile'"')
return 0
/** SaveWorkFile *************************************************************/
SecureWorkFile:
/* Don't save if not secure */
if ~def.Secure then return 0
SaveWorkFile:
/* Don't save if no workfile! 8) */
if opts.WorkFile = '' then return 0
if cmd(um, 'SaveFile FILE "'opts.WorkFile'" FULL') ~= 0 then
call DoFail(10, "Error: Can't save workfile!?")
return 0
/** Realm Stuff **************************************************************/
ReadRealms:
if def.PasswordFile='' then do
authtot=0
return 0
end
if ~open('r',def.PasswordFile,'r') then
call DoFail(20, 'Error openning password file')
rri=0
do while ~eof('r')
line = readln('r')
if line='' then iterate
if left(line,1)=';' then iterate
rri=rri+1
parse var line authr.rri':'authu.rri
end
authtot = rri
drop rri
return 0
GetRealm:
PARSE ARG gr.rlm
do gri=1 to authtot
if gr.rlm = authr.gri then return authu.gri
end
return ''
/** TheLoop ******************************************************************/
TheLoop:
call TheLoop2 'Q'
if opts.Retry then
call TheLoop2 'F'
return 0
TheLoop2:
parse arg tl.type
/* First we do 'Q'ueued urls */
j = 1; gu.lmoved = 0
do forever
if gu.lmoved = 0 then do
call cmd(um, 'Search 'j' type 'tl.type' pattern "HTTP://#?"')
i = umres
if i=0 then break /* No more url */
g.cnt = g.cnt+1
j = i+1
if g.cnt > 1 then call Delay(opts.TDelay)
end
if i=0 then break
gu.lmoved = 0
call GetUrl(i)
if gu.lmoved = 1 then do
call cmd(gh, 'GetHeaderString "Location:"')
if upper(left(ghres, 9)) = 'LOCATION:' then do
newurl = strip(right(ghres, length(ghres)-10))
call cmd(um, 'SetUrl ID 'i' URL "'newurl'"')
drop newurl
end
else do
gu.lmoved = 0
end
end
else do
if ((opts.MaxCount ~= 0 ) & (g.cnt >= opts.MaxCount)) then do
call Log 'Received the maximal number of file allowed'
return 0
end
tl.el = time('e')
if ((opts.MaxTime ~= 0) & (opts.MaxTimeS <= time('e'))) then do
tl.el = tl.el/60
call Log 'Downloaded 'tl.el' mins, max was 'opts.MaxTime' mins'
return 0
end
call cmd(gh, 'GetByteRecd')
g.bytes = g.bytes + ghres
if (opts.MaxBytes ~= 0) & (g.bytes >= opts.MaxBytes) then do
call Log 'Downloaded 'g.bytes' bytes, max was 'opts.MaxBytes' bytes'
return 0
end
end
end
return 0
/** GetUrl *******************************************************************/
GetUrl:
PARSE ARG gu.id, gu.isfailed
if cmd(um, 'GetURL 'gu.id) ~= 0 then return 0
gu.url = umres
if cmd(um, 'GetDepth 'gu.id) ~= 0 then return 0
gu.depth = umres
gu.out = '['right(g.cnt, 4)'] 'gu.url
if gu.isfailed ~= '' then gu.out = gu.out' (F)'
if opts.Recursive then gu.out = gu.out' (D: 'gu.depth')'
call Log(gu.out'...', 1)
if upper(left(gu.url,7)) ~= "HTTP://" then do
call Log ' Not HTTP'
if def.KeepUnused then call cmd(um, 'SetType 'gu.id' U')
else call cmd(um, 'KillUrl 'hu.id)
if def.Secure then call SecureWorkFile
return 0
end
/* Separate host & path */
parse var gu.url shit '://' gu.hp '/' gu.path
if gu.hp = '' then do
call Log ' Not grabbed'
if def.KeepUnused then call cmd(um, 'SetType 'gu.id' U')
else call cmd(um, 'KillUrl 'hu.id)
if def.Secure then call SecureWorkFile
return 0
end
/* Separate host & port */
parse var gu.hp gu.host ':' gu.port
if gu.host = '' then do
call Log ' Not grabbed'
if def.KeepUnused then call cmd(um, 'SetType 'gu.id' U')
else call cmd(um, 'KillUrl 'hu.id)
if def.Secure then call SecureWorkFile
return 0
end
if gu.port = '' then gu.port = 80
/* Complete the name */
if gu.path = '' then gu.path = 'index.html'
else if right(gu.path, 1) = '/' then gu.path = gu.path || 'index.html'
/* Strip path if nodirs */
if opts.NoDirs then do while index(gu.path, '/') ~= 0
gu.path = right(gu.path, length(gu.path) - index(gu.path, '/'))
end
else gu.path = gu.host'/'gu.path
/* Translates the filename */
if def.Translate ~= '' then do
call cmd(gh, 'Translate "'gu.path'" "'def.Translate'" "'def.TranslateTo'"')
gu.path = GHRES
end
/* Check if it already exists */
if opts.NotExists & Exists(opts.SaveRoot||gu.path) then do
call Log ' Already on disk'
if def.KeepReceived then call cmd(um, 'SetType 'gu.id' R')
else call cmd(um, 'KillUrl 'hu.id)
if def.Secure then call SecureWorkFile
return 0
end
thecmd = 'GetHTTPFile "'gu.url'"'
if def.Progress then thecmd = thecmd' Progress'
if ~opts.NoDirs then thecmd = thecmd' FileName "'gu.path'"'
if opts.MaxSize ~= 0 then thecmd = thecmd' MaxSize 'opts.MaxSize
if opts.SaveHeaders then thecmd = thecmd' SaveHeader'
if opts.IfModified then thecmd = thecmd' IfModified'
if opts.HeaderOnly then thecmd = thecmd' HeaderOnly'
if def.TouchOldDirs then thecmd = thecmd' TouchDirs'
call cmd(gh, thecmd)
gu.res = ghres
/* Authorization Check */
if gu.res = '100' then do
call cmd(gh, 'GetHeaderString "HTTP/1."')
parse var ghres dummy" "h.code" "h.msg
if h.code = '401' then do
call Log(' [Auth]', 1)
call cmd(gh, 'GetHeaderString "WWW-Authenticate: Basic "')
if RC=0 then do
parse var ghres dummy '"'g.realm'"'
g.authpwd = GetRealm(g.realm)
if g.authpwd = '' then do
call Log(' Unknown realm: "'g.realm'"', 1)
gu.res = 9999
end
else do
call cmd(gh, 'SetAuth "'g.authpwd'"')
if RC~=0 then call DoFail(20, "Error: can't alloc mem for authentication!")
call cmd(gh, thecmd)
gu.res = ghres
call cmd(gh, 'SetAuth')
end
end
else do
call Log(' Unknown authentication method!')
gh.res = 9999
end
end
end
if gu.res = '0' then do
if def.KeepReceived then call cmd(um, 'SetType 'gu.id' R')
else call cmd(um, 'KillUrl 'hu.id)
call Log(' Done.', 1)
end
else if gu.res = '200' then do
call cmd(um, 'SetType 'gu.id' F')
call Log(' Incomplete file.', 1)
end
else if gu.res = '9999' then nop
else if gu.res = '100' then do
call cmd(gh, 'GetHeaderString "HTTP/1."')
parse var ghres dummy" "h.code" "h.msg
if h.code = '304' then do
call Log(' Not modified.', 1)
if def.KeepReceived then call cmd(um, 'SetType 'gu.id' R')
else call cmd(um, 'KillUrl 'hu.id)
end
else if h.code = '301' then do
call Log(' Moved.', 1)
if def.FollowMoved = '1' then gu.lmoved = 1
else if def.FollowMoved = 'F' then call cmd(um, 'SetType 'gu.id' F')
else if def.FollowMoved = 'X' then call cmd(um, 'SetType 'gu.id' X')
end
else if h.code = '302' then do
call Log(' Moved temporarily.', 1)
/* if def.KeepReceived then call cmd(um, 'SetType 'gu.id' F') */
/* else call cmd(um, 'KillUrl 'hu.id) */
if def.FollowTMoved = '1' then gu.lmoved = 1
else if def.FollowTMoved = 'F' then call cmd(um, 'SetType 'gu.id' F')
else if def.FollowTMoved = 'X' then call cmd(um, 'SetType 'gu.id' X')
end
else if h.code = '401' then do
call Log(' Bad user/password.', 1)
call cmd(um, 'SetType 'gu.id' U')
end
else do
call Log(' 'h.code' 'h.msg'.', 1)
if def.KeepReceived then call cmd(um, 'SetType 'gu.id' X')
else call cmd(um, 'KillUrl 'hu.id)
end
end
else if gu.res='104' then do
call Log(' Host not found.', 1)
call cmd(um, 'SetType 'gu.id' X')
end
else if gu.res='107' then do
call Log(' Disk Full.', 1)
call DoFail(20, "Error Disk Full.")
end
else if gu.res='200' then do
call Log(' File incomplete.', 1)
call cmd(um, 'SetType 'gu.id' F')
end
else if gu.res='201' then do
call Log(' User Abort.', 1)
if ~def.ExitOnClose then call cmd(um, 'SetType 'gu.id' F')
else call DoFail(10, "Error: User abort.")
end
else if gu.res='97' then do
call Log(' Failed to create socket.', 1)
call DoFail(20, "Error.")
end
else if (gu.res='98') | (gu.res='99') | (gu.res='164') then do
call Log(' Not enough memory.', 1)
call DoFail(20, "Error.")
end
else do
call Log(' Failed code 'gu.res'.', 1)
if def.KeepFailed then call cmd(um, 'SetType 'gu.id' F')
else call cmd(um, 'KillUrl 'hu.id)
end
if gu.res == '0' & opts.Recursive then do
call cmd(gh, 'GetHeaderString "Content-Type:"')
if (index(upper(ghres), 'TEXT/HTML')~=0) | (index(upper(gu.path), '.HTM') > (length(gu.path)-5)) then do
if gu.depth ~= 0 then do
if gu.depth = -1 then gu.newdepth = -1
else gu.newdepth = gu.depth - 1
thecmd = def.ScanHTML' "'opts.SaveRoot||gu.path'" "'def.TempFile'"'
thecmd = thecmd' Base "'gu.url'" Pattern2 "'def.ParsePattern'" NoF# NoQuery'
if opts.Pattern ~= '' then thecmd = thecmd' Pattern "'opts.Pattern'"'
if opts.NoSrc then thecmd = thecmd' NoSrc'
if opts.NoHRef then thecmd = thecmd' NoHRef'
if opts.NoBG then thecmd = thecmd' NoBG'
address command thecmd
if RC ~= 0 then call Log ' (Can''t parse)'
else do
if cmd(um, 'ReadFile 'def.TempFile' Q Depth 'gu.newdepth) ~= 0 then do
call Log ' (Can''t add urls)'
end
else do
if umres = '0' then call Log ''
else if umres = '1' then
call Log ' (+1 url)'
else call Log ' (+'umres' urls)'
end
end
call Delete(def.TempFile)
end
else call Log
end
else call Log
end
else call Log ''
call SecureWorkFile
return 0
novalue:
call oops("Novalue", sigl)
syntax:
call oops("Syntax(RC="RC")", sigl, RC)
failure:
call oops("Failure(RC="RC")", sigl)
ioerr:
call oops("IOErr", sigl)
halt:
call oops("Halt", sigl)
error:
call oops("Error", sigl)
oops:
parse arg what, badline, code
if code != '' then
call DoFail(40, "ERR: Line "badline what errortext(code))
else
call DoFail(40, "ERR: Line "badline what)